LBP Descriptor Analysis


In [2]:
%pylab inline
from tools import *
from PythonWrapper.descriptors import *


Populating the interactive namespace from numpy and matplotlib

LBP Descriptor


In [3]:
lbf_descs = np.load("../descriptors/lbp_lfwa.npy")
lbf_descs = lbf_descs.reshape((lbf_descs.shape[0], lbf_descs.shape[1]/256, 256))

Variance Analysis and Uniform Property


In [18]:
variances = np.var(lbf_descs, axis=(0,1))
variances_percentage = variances / np.sum(variances)
plot(variances_percentage)
xlabel("LBP values")
ylabel("Variance distribution")
_ = xlim(xmax=255)



In [5]:
def isUniform(x):
    uniform = True
    prev_x = x / 128
    transitions = 0
    
    for i in range(8):
        is_transition = (x%2 != prev_x%2)
        if is_transition:
            transitions += 1
        if transitions > 2:
            uniform = False
            break
        prev_x = x
        x /= 2
        
    return uniform

In [6]:
sorted_variances = sorted(enumerate(variances_percentage), key=lambda x: x[1], reverse=True)
print map(lambda x: isUniform(x[0]), sorted_variances[:60])
print "Number of uniform LBP in the 58 biggest variances components: %d"%np.sum(map(lambda x: isUniform(x[0]), sorted_variances[:58]))


[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, True, True, True, True]
Number of uniform LBP in the 58 biggest variances components: 56

In [12]:
%config InlineBackend


InlineBackend options
-------------------
InlineBackend.close_figures=<Bool>
    Current: True
    Close all figures at the end of each cell.
    When True, ensures that each cell starts with no active figures, but it also
    means that one must keep track of references in order to edit or redraw
    figures in subsequent cells. This mode is ideal for the notebook, where
    residual plots from other cells might be surprising.
    When False, one must call figure() to create new figures. This means that
    gcf() and getfigs() can reference figures created in other cells, and the
    active figure can continue to be edited with pylab/pyplot methods that
    reference the current active figure. This mode facilitates iterative editing
    of figures, and behaves most consistently with other matplotlib backends,
    but figure barriers between cells must be explicit.
InlineBackend.figure_format=<Unicode>
    Current: u''
    The figure format to enable (deprecated use `figure_formats` instead)
InlineBackend.figure_formats=<Set>
    Current: set(['png'])
    A set of figure formats to enable: 'png',  'retina', 'jpeg', 'svg', 'pdf'.
InlineBackend.print_figure_kwargs=<Dict>
    Current: {'bbox_inches': 'tight'}
    Extra kwargs to be passed to fig.canvas.print_figure.
    Logical examples include: bbox_inches, quality (for jpeg figures), etc.
InlineBackend.rc=<Dict>
    Current: {'font.size': 10, 'figure.figsize': (6.0, 4.0), 'figure.facecolor': (1, 1, 1, 0), 'savefig.dpi': 72, 'figure.subplot.bottom': 0.125, 'figure.edgecolor': (1, 1, 1, 0)}
    Subset of matplotlib rcParams that should be different for the inline
    backend.

In [16]:
cum_sum = np.cumsum(sorted(variances_percentage, reverse=True))
print len(cum_sum[cum_sum<0.98])
print len(cum_sum[cum_sum<0.9])
print cum_sum[58-1]

xlabel("Number of dimensions")
ylabel("Cumulative variance distribution")
plot(cum_sum)
xlim(xmax=255)


47
19
0.987506

In [1]:
indexes = (cum_sum < 0.99)
[(format(int(i), '08b'), int(i), x, cum) for (i,x),cum in zip(np.asarray(sorted_variances)[indexes], cum_sum[indexes])]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-d08d35d9c44a> in <module>()
----> 1 indexes = (cum_sum < 0.99)
      2 [(format(int(i), '08b'), int(i), x, cum) for (i,x),cum in zip(np.asarray(sorted_variances)[indexes], cum_sum[indexes])]

NameError: name 'cum_sum' is not defined

PCA Analysis


In [5]:
import random
from sklearn.decomposition import PCA
pca = PCA()
descs = lbf_descs.reshape(lbf_descs.shape[0]*lbf_descs.shape[1], lbf_descs.shape[2])
indexes = random.sample(range(descs.shape[0]), 10000)
pca.fit(descs[indexes])


Out[5]:
PCA(copy=True, n_components=None, whiten=False)

In [10]:
cumsum = np.cumsum(pca.explained_variance_ratio_)
print len(cumsum[cumsum < 0.98])
print len(cumsum[cumsum < 0.9])
print cumsum[58-1]
plot(cumsum)
xlabel("Number of dimensions")
ylabel("Cumulative variance distribution")
xlim(xmax=255)
#axhline(0.98, color="r")
#axhline(0.9, color="g")


44
15
0.988643
Out[10]:
(0.0, 255)

Sparse PCA Analysis


In [22]:
import random
from sklearn.decomposition import SparsePCA
sparse_pca = SparsePCA(alpha=0.5)
descs = lbf_descs.reshape(lbf_descs.shape[0]*lbf_descs.shape[1], lbf_descs.shape[2])
indexes = random.sample(range(descs.shape[0]), 10000)
sparse_pca.fit(descs[indexes])


Out[22]:
SparsePCA(U_init=None, V_init=None, alpha=0.5, max_iter=1000, method='lars',
     n_components=None, n_jobs=1, random_state=None, ridge_alpha=0.01,
     tol=1e-08, verbose=False)

In [23]:
print np.where(sparse_pca.components_[0] != 0)


(array([224]),)

In [24]:
non_null_components = [list(np.where(sparse_pca.components_[i] != 0)[0]) for i in range(sparse_pca.components_.shape[0])]
print non_null_components


[[224], [14, 28, 30, 62], [56, 112, 120, 248], [131, 193, 195, 227], [7, 14, 15, 143], [193, 225, 227], [7, 131, 135, 143], [28, 56, 60, 62], [0], [], [112, 240, 248], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

In [25]:
for component in non_null_components:
    output = ""
    for i in component:
        output += "(%d, %s) "%(i, str(isUniform(i)))
    if len(output) > 0:
        print output


(224, True) 
(14, True) (28, True) (30, True) (62, True) 
(56, True) (112, True) (120, True) (248, True) 
(131, True) (193, True) (195, True) (227, True) 
(7, True) (14, True) (15, True) (143, True) 
(193, True) (225, True) (227, True) 
(7, True) (131, True) (135, True) (143, True) 
(28, True) (56, True) (60, True) (62, True) 
(0, True) 
(112, True) (240, True) (248, True) 

In [26]:
var = np.var(sparse_pca.transform(descs), axis=0)

Uniform LBP Descriptor


In [11]:
ulbf_descs = np.load("descriptors/ulbp_lfwa.npy")
ulbf_descs = ulbf_descs.reshape((ulbf_descs.shape[0], ulbf_descs.shape[1]/59, 59))

variances = np.var(ulbf_descs, axis=(0,1))
variances_percentage = variances / np.sum(variances)
plot(variances_percentage)


Out[11]:
[<matplotlib.lines.Line2D at 0x7139510>]

In [12]:
print "Number of components over 1%% of variance: %d"%len(variances_percentage[variances_percentage>0.005])

cum_sum = np.cumsum(sorted(variances_percentage, reverse=True))
print len(cum_sum[cum_sum<0.99]), len(cum_sum[cum_sum<0.9]), len(variances_percentage[variances_percentage>0.01])

plot(cum_sum)


Number of components over 1% of variance: 22
46 18 18
Out[12]:
[<matplotlib.lines.Line2D at 0x7167190>]

In [13]:
pca = PCA()
descs = ulbf_descs.reshape(lbf_descs.shape[0]*lbf_descs.shape[1], lbf_descs.shape[2])
indexes = random.sample(range(descs.shape[0]), 10000)
pca.fit(descs[indexes])


Out[13]:
PCA(copy=True, n_components=None, whiten=False)

In [14]:
cumsum = np.cumsum(pca.explained_variance_ratio_)
print len(cumsum[cumsum < 0.98])
print len(cumsum[cumsum < 0.9])
plot(cumsum)


36
15
Out[14]:
[<matplotlib.lines.Line2D at 0x7311850>]

In [ ]: